library(plotly)
library(dplyr)
df <- read.csv("Properties_philly_Kraggle_v2.csv")
ramka_cen <- df %>%
filter(!is.na(Sale.Price.bid.price),!is.na(School.Score), !is.na(Violent.Crime.Rate))%>%
select(cena_koncowa = Sale.Price.bid.price, ocena_szkol = School.Score) %>%
mutate(cena_koncowa = as.integer(
gsub("(,)", "", gsub("(\\$)", "",cena_koncowa)))
)
centyle <- ramka_cen %>%
summarise(cena = quantile(cena_koncowa,probs = seq(0.01, 1, 1/100)))
which.min2 <- function(y){
return(max(which(y == min(y))))
}
ktory_kwantyl<-function(a,b){
return (which.min2(abs(b - a)))
}
rameczka <- ramka_cen %>%
mutate(centyle = as.numeric(lapply(cena_koncowa, ktory_kwantyl,centyle$cena))) %>%
mutate(grupa = case_when(centyle <= 10 ~ 10,
centyle > 10 & centyle <= 20 ~ 20,
centyle > 20 & centyle <= 30 ~ 30,
centyle > 30 & centyle <= 40 ~ 40,
centyle > 40 & centyle <= 50 ~ 50,
centyle > 50 & centyle <= 60 ~ 60,
centyle > 60 & centyle <= 70 ~ 70,
centyle > 70 & centyle <= 80 ~ 80,
centyle > 80 & centyle <= 90 ~ 90,
TRUE ~ 100)) %>%
group_by(ocena_szkol, grupa)
plot_ly(data = rameczka, x = ~ocena_szkol, frame = ~grupa, type = "box") %>% layout(
title = "Rozkład ocen szkół, w zależności od ceny nieruchomości",
xaxis = list(title = "Ocena szkół"),
yaxis = list(title = "")) %>%
animation_slider(currentvalue = list(prefix = "Centyl ceny: ", font = list(color="black")))